import pandas as pd
import plotly.express as plx
from raceplotly.plots import barplot
from tqdm import tqdm
import warnings
from datetime import datetime, timedelta
import plotly.io as pio
pio.renderers.default = "notebook"
warnings.filterwarnings("ignore")
comp_df = pd.read_csv('complaints.csv/complaints.csv',low_memory=False)
comp_df
| Date received | Product | Sub-product | Issue | Sub-issue | Consumer complaint narrative | Company public response | Company | State | ZIP code | Tags | Consumer consent provided? | Submitted via | Date sent to company | Company response to consumer | Timely response? | Consumer disputed? | Complaint ID | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2024-03-13 | Credit card | General-purpose credit card or charge card | Advertising and marketing, including promotion... | Didn't receive advertised or promotional terms | I formally request the removal of an inaccurat... | NaN | EQUIFAX, INC. | MO | 631XX | NaN | Consent provided | Web | 2024-03-13 | Closed with non-monetary relief | Yes | NaN | 8538710 |
| 1 | 2024-03-14 | Credit reporting or other personal consumer re... | Credit reporting | Incorrect information on your report | Information belongs to someone else | NaN | NaN | EQUIFAX, INC. | PA | 19142 | NaN | Consent not provided | Web | 2024-03-14 | Closed with non-monetary relief | Yes | NaN | 8551289 |
| 2 | 2023-10-25 | Credit reporting or other personal consumer re... | Credit reporting | Improper use of your report | Reporting company used your report improperly | NaN | Company has responded to the consumer and the ... | TRANSUNION INTERMEDIATE HOLDINGS, INC. | CA | 92114 | NaN | Consent not provided | Web | 2023-10-25 | Closed with explanation | Yes | NaN | 7755832 |
| 3 | 2023-10-25 | Mortgage | FHA mortgage | Struggling to pay mortgage | An existing modification, forbearance plan, sh... | NaN | Company has responded to the consumer and the ... | Specialized Loan Servicing Holdings LLC | CA | 956XX | NaN | Other | Web | 2023-10-25 | Closed with explanation | Yes | NaN | 7753166 |
| 4 | 2024-05-02 | Money transfer, virtual currency, or money ser... | Domestic (US) money transfer | Money was not available when promised | NaN | NaN | NaN | Sigue Corp. | TX | 78644 | NaN | Other | Web | 2024-05-02 | In progress | Yes | NaN | 8916876 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 5242995 | 2023-08-02 | Credit reporting, credit repair services, or o... | Credit reporting | Incorrect information on your report | Information belongs to someone else | NaN | NaN | EQUIFAX, INC. | TX | 76036 | NaN | Other | Web | 2023-08-02 | Closed with non-monetary relief | Yes | NaN | 7337991 |
| 5242996 | 2023-08-02 | Payday loan, title loan, or personal loan | Installment loan | Problem with additional add-on products or ser... | NaN | NaN | NaN | NBT BANCORP INC. | NJ | 08046 | NaN | NaN | Phone | 2023-08-02 | Closed with explanation | Yes | NaN | 7343156 |
| 5242997 | 2023-08-02 | Mortgage | FHA mortgage | Struggling to pay mortgage | NaN | NaN | Company believes it acted appropriately as aut... | Chesmar Homes | TX | 77316 | NaN | Other | Web | 2023-08-17 | Closed with explanation | Yes | NaN | 7340709 |
| 5242998 | 2023-08-02 | Credit reporting, credit repair services, or o... | Credit reporting | Improper use of your report | Reporting company used your report improperly | NaN | NaN | EQUIFAX, INC. | GA | 30013 | NaN | Consent not provided | Web | 2023-08-02 | Closed with explanation | Yes | NaN | 7341166 |
| 5242999 | 2023-08-04 | Payday loan, title loan, or personal loan | Payday loan | Charged fees or interest you didn't expect | NaN | NaN | NaN | Flandreau Santee Sioux Tribe | TX | 75057 | NaN | Consent not provided | Web | 2023-08-18 | Closed with explanation | Yes | NaN | 7350142 |
5243000 rows × 18 columns
comp_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5243000 entries, 0 to 5242999 Data columns (total 18 columns): # Column Dtype --- ------ ----- 0 Date received object 1 Product object 2 Sub-product object 3 Issue object 4 Sub-issue object 5 Consumer complaint narrative object 6 Company public response object 7 Company object 8 State object 9 ZIP code object 10 Tags object 11 Consumer consent provided? object 12 Submitted via object 13 Date sent to company object 14 Company response to consumer object 15 Timely response? object 16 Consumer disputed? object 17 Complaint ID int64 dtypes: int64(1), object(17) memory usage: 720.0+ MB
col_name, total_null= [], []
for x in comp_df.columns:
col_name.append(x)
total_null.append(sum(comp_df[x].isnull()))
df_null_info = pd.DataFrame()
df_null_info['column_name'], df_null_info['Total_null'] = col_name, total_null
df_null_info
| column_name | Total_null | |
|---|---|---|
| 0 | Date received | 0 |
| 1 | Product | 0 |
| 2 | Sub-product | 235292 |
| 3 | Issue | 3 |
| 4 | Sub-issue | 736164 |
| 5 | Consumer complaint narrative | 3406287 |
| 6 | Company public response | 2729863 |
| 7 | Company | 0 |
| 8 | State | 45780 |
| 9 | ZIP code | 30225 |
| 10 | Tags | 4757083 |
| 11 | Consumer consent provided? | 1041536 |
| 12 | Submitted via | 0 |
| 13 | Date sent to company | 0 |
| 14 | Company response to consumer | 15 |
| 15 | Timely response? | 0 |
| 16 | Consumer disputed? | 4474684 |
| 17 | Complaint ID | 0 |
plx.bar(df_null_info, x = 'column_name', y='Total_null')
col_name, total_unique = [], []
for x in comp_df.columns:
col_name.append(x)
total_unique.append(comp_df[x].nunique())
df_unique_count = pd.DataFrame()
df_unique_count['column_name'], df_unique_count['Total_unique'] = col_name, total_unique
df_unique_count
| column_name | Total_unique | |
|---|---|---|
| 0 | Date received | 4550 |
| 1 | Product | 21 |
| 2 | Sub-product | 86 |
| 3 | Issue | 178 |
| 4 | Sub-issue | 272 |
| 5 | Consumer complaint narrative | 1492267 |
| 6 | Company public response | 11 |
| 7 | Company | 7220 |
| 8 | State | 63 |
| 9 | ZIP code | 33657 |
| 10 | Tags | 3 |
| 11 | Consumer consent provided? | 4 |
| 12 | Submitted via | 7 |
| 13 | Date sent to company | 4499 |
| 14 | Company response to consumer | 8 |
| 15 | Timely response? | 2 |
| 16 | Consumer disputed? | 2 |
| 17 | Complaint ID | 5243000 |
fig = plx.bar(pd.DataFrame(comp_df['Date received'].value_counts()))
fig.update_traces(dict(marker_line_width=0))
fig.update_layout(
title="No of Complaints in a day over time",
xaxis_title="Time",
yaxis_title="Total Complaints",
font=dict(
family="Courier New, monospace",
size=13,
color="RebeccaPurple"
)
)
fig.show()
df_1 = pd.DataFrame(comp_df['Product'].value_counts(dropna=False))
fig = plx.pie(df_1, values=df_1['Product'], names=df_1.index)
fig.update_layout(
title="Total number of complaints for the desired product",
font=dict(
family="Courier New, monospace",
size=13,
color="RebeccaPurple"
)
)
fig.show()
fig = plx.bar(pd.DataFrame(comp_df['Sub-product'].value_counts() ), orientation='h' )
fig.update_layout(
title="Total number of complaints for the desired Sub-product",
height = 700,
font=dict(
family="Courier New, monospace",
size=13,
color="RebeccaPurple"
)
)
fig.show()
fig = plx.bar(pd.DataFrame(comp_df['Issue'].value_counts(dropna=False) ), orientation='h' )
fig.update_layout(
title="Total number of complaints for the desired Issue",
height = 750,
font=dict(
family="Courier New, monospace",
size=13,
color="RebeccaPurple"
)
)
fig.show()
fig = plx.bar(pd.DataFrame(comp_df['Sub-issue'].value_counts(dropna=False) ), orientation='h' )
fig.update_layout(
title="Total number of complaints for the desired Sub-issue",
font=dict(
family="Courier New, monospace",
size=12,
color="RebeccaPurple"
)
)
fig.show()
df_2 = pd.DataFrame(comp_df['Company public response'].value_counts(dropna=False))
fig = plx.pie(df_2, values=df_2['Company public response'], names=df_2.index)
fig.update_layout(
title="Total number of complaints for the desired \"Company public response\" " ,
margin=dict(t=0, b=0, l=0, r=0),
font=dict(
family="Courier New, monospace",
size=11,
color="RebeccaPurple"
)
)
fig.show()
df_3 = pd.DataFrame(comp_df['Company'].value_counts() )
df_3 = df_3.reset_index()
df_3['Company'], df_3['count'] = df_3['index'], df_3['Company']
df_3.drop(['index'], 1, inplace=True)
df_3['contribution %'] = [((x/len(comp_df)) * 100) for x in df_3['count']]
df_3.head(5)
| Company | count | contribution % | |
|---|---|---|---|
| 0 | EQUIFAX, INC. | 1094863 | 20.882377 |
| 1 | TRANSUNION INTERMEDIATE HOLDINGS, INC. | 1016332 | 19.384551 |
| 2 | Experian Information Solutions Inc. | 925932 | 17.660347 |
| 3 | BANK OF AMERICA, NATIONAL ASSOCIATION | 140733 | 2.684208 |
| 4 | WELLS FARGO & COMPANY | 128879 | 2.458116 |
df_4 = pd.DataFrame(comp_df['State'].value_counts() )
df_4 = df_4.reset_index()
df_4['State'], df_4['count'] = df_4['index'], df_4['State']
df_4.drop(['index'], 1, inplace=True)
df_4
| State | count | |
|---|---|---|
| 0 | FL | 629941 |
| 1 | CA | 597278 |
| 2 | TX | 560364 |
| 3 | GA | 360174 |
| 4 | NY | 336721 |
| ... | ... | ... |
| 58 | AA | 59 |
| 59 | MP | 57 |
| 60 | AS | 50 |
| 61 | MH | 33 |
| 62 | PW | 13 |
63 rows × 2 columns
fig = plx.choropleth(df_4, locations='State',locationmode='USA-states', color='count',
color_continuous_scale="Viridis",
range_color=(0, max(df_4['count'])),
scope="north america",
labels={'Complaints': 'Complaints Count'}
)
fig.update_layout(
title = "Total number of complaints for the \"State\" in USA " ,
height = 700,
margin = dict(t=0, b=0, l=0, r=0),
font = dict(
family="Courier New, monospace",
size=11,
color="RebeccaPurple"
)
)
fig.show()
df_5 = pd.DataFrame(comp_df['ZIP code'].value_counts() )
df_5 = df_5.reset_index()
df_5['ZIP code'], df_5['count'] = df_5['index'], df_5['ZIP code']
df_5['contribution %'] = [((x/len(comp_df)) * 100) for x in df_5['count']]
df_5.drop(['index'], 1, inplace=True)
df_5
| ZIP code | count | contribution % | |
|---|---|---|---|
| 0 | XXXXX | 120483 | 2.297978 |
| 1 | 30349 | 9723 | 0.185447 |
| 2 | 19143 | 8231 | 0.156990 |
| 3 | 33025 | 7345 | 0.140092 |
| 4 | 33311 | 6725 | 0.128266 |
| ... | ... | ... | ... |
| 33652 | 46200 | 1 | 0.000019 |
| 33653 | 87470 | 1 | 0.000019 |
| 33654 | 85062 | 1 | 0.000019 |
| 33655 | 98141 | 1 | 0.000019 |
| 33656 | 98933 | 1 | 0.000019 |
33657 rows × 3 columns
df_5_temp_1 = pd.DataFrame(comp_df)
df_5_temp_1 = df_5_temp_1.where(df_5_temp_1['ZIP code'].str.contains('X'))
df_5_temp_1.dropna(how='all',inplace=True)
df_5_temp_1
| Date received | Product | Sub-product | Issue | Sub-issue | Consumer complaint narrative | Company public response | Company | State | ZIP code | Tags | Consumer consent provided? | Submitted via | Date sent to company | Company response to consumer | Timely response? | Consumer disputed? | Complaint ID | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2024-03-13 | Credit card | General-purpose credit card or charge card | Advertising and marketing, including promotion... | Didn't receive advertised or promotional terms | I formally request the removal of an inaccurat... | NaN | EQUIFAX, INC. | MO | 631XX | NaN | Consent provided | Web | 2024-03-13 | Closed with non-monetary relief | Yes | NaN | 8538710.0 |
| 3 | 2023-10-25 | Mortgage | FHA mortgage | Struggling to pay mortgage | An existing modification, forbearance plan, sh... | NaN | Company has responded to the consumer and the ... | Specialized Loan Servicing Holdings LLC | CA | 956XX | NaN | Other | Web | 2023-10-25 | Closed with explanation | Yes | NaN | 7753166.0 |
| 13 | 2024-01-19 | Credit reporting or other personal consumer re... | Credit reporting | Improper use of your report | Reporting company used your report improperly | 15USC 1681 a ( d ) ( 2 ) ( A ) ( i ) clearly s... | Company has responded to the consumer and the ... | Experian Information Solutions Inc. | MD | 218XX | NaN | Consent provided | Web | 2024-01-19 | Closed with explanation | Yes | NaN | 8183526.0 |
| 15 | 2024-05-03 | Credit reporting or other personal consumer re... | Credit reporting | Problem with a company's investigation into an... | Their investigation did not fix an error on yo... | NaN | NaN | Experian Information Solutions Inc. | NY | 117XX | NaN | NaN | Web | 2024-05-03 | In progress | Yes | NaN | 8913449.0 |
| 20 | 2023-09-15 | Credit reporting or other personal consumer re... | Credit reporting | Incorrect information on your report | Information belongs to someone else | I am writing to formally dispute and request c... | Company has responded to the consumer and the ... | Experian Information Solutions Inc. | MO | 630XX | NaN | Consent provided | Web | 2023-09-15 | Closed with non-monetary relief | Yes | NaN | 7549549.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 5242837 | 2021-10-16 | Credit card or prepaid card | General-purpose credit card or charge card | Closing your account | Company closed your account | I sent Capital One written correspondent about... | NaN | CAPITAL ONE FINANCIAL CORPORATION | TX | 799XX | NaN | Consent provided | Web | 2021-10-16 | Closed with explanation | Yes | NaN | 4815687.0 |
| 5242838 | 2021-10-16 | Money transfer, virtual currency, or money ser... | Mobile or digital wallet | Managing, opening, or closing your mobile wall... | NaN | NaN | NaN | Paypal Holdings, Inc | NaN | XXXXX | NaN | Consent not provided | Web | 2021-10-16 | Closed with explanation | Yes | NaN | 4814403.0 |
| 5242840 | 2021-10-16 | Debt collection | I do not know | Written notification about debt | Didn't receive notice of right to dispute | NaN | NaN | The CBE Group, Inc. | MN | 563XX | NaN | Other | Web | 2021-10-16 | Closed with explanation | Yes | NaN | 4815034.0 |
| 5242844 | 2021-10-18 | Debt collection | Other debt | Attempts to collect debt not owed | Debt was result of identity theft | NaN | NaN | Bread Financial Holdings, Inc. | WV | 253XX | NaN | Other | Web | 2021-10-18 | Closed with explanation | Yes | NaN | 4820567.0 |
| 5242891 | 2021-11-20 | Mortgage | Conventional home mortgage | Struggling to pay mortgage | NaN | NaN | NaN | 21ST MORTGAGE CORP. | AR | 716XX | NaN | Other | Web | 2021-11-20 | Closed with explanation | Yes | NaN | 4932173.0 |
549537 rows × 18 columns
df_5_temp_2 = pd.DataFrame(df_5_temp_1['State'].value_counts() )
df_5_temp_2 = df_5_temp_2.reset_index()
df_5_temp_2['State'], df_5_temp_2['hidden_count'] = df_5_temp_2['index'], df_5_temp_2['State']
total_complaints = []
for x in tqdm(df_5_temp_2['State']):
df_5_temp_3 = comp_df.where(comp_df['State'] == x)
df_5_temp_3.dropna(how='all',inplace=True)
total_complaints.append(len(df_5_temp_3))
df_5_temp_2['Total Complaints'] = total_complaints
df_5_temp_2['Hidden rate %'] = list((df_5_temp_2['hidden_count'] / df_5_temp_2['Total Complaints']) * 100)
df_5_temp_2.drop(['index'], 1, inplace=True)
df_5_temp_2
100%|██████████| 62/62 [10:27<00:00, 10.12s/it]
| State | hidden_count | Total Complaints | Hidden rate % | |
|---|---|---|---|---|
| 0 | FL | 51888 | 629941 | 8.236962 |
| 1 | TX | 48719 | 560364 | 8.694170 |
| 2 | CA | 37114 | 597278 | 6.213857 |
| 3 | NY | 29544 | 336721 | 8.774030 |
| 4 | PA | 28872 | 248582 | 11.614678 |
| ... | ... | ... | ... | ... |
| 57 | AA | 25 | 59 | 42.372881 |
| 58 | MP | 19 | 57 | 33.333333 |
| 59 | FM | 16 | 116 | 13.793103 |
| 60 | AS | 8 | 50 | 16.000000 |
| 61 | MH | 3 | 33 | 9.090909 |
62 rows × 4 columns
df_5_temp_2.sort_values('Hidden rate %', inplace=True, ascending=False)
df_5_temp_2.reset_index(inplace=True)
df_5_temp_2
| index | State | hidden_count | Total Complaints | Hidden rate % | |
|---|---|---|---|---|---|
| 0 | 56 | GU | 163 | 353 | 46.175637 |
| 1 | 54 | AP | 332 | 749 | 44.325768 |
| 2 | 51 | AE | 554 | 1298 | 42.681048 |
| 3 | 57 | AA | 25 | 59 | 42.372881 |
| 4 | 53 | VI | 361 | 987 | 36.575481 |
| ... | ... | ... | ... | ... | ... |
| 57 | 22 | AZ | 6861 | 93067 | 7.372108 |
| 58 | 41 | DC | 1660 | 22770 | 7.290294 |
| 59 | 5 | GA | 26241 | 360174 | 7.285645 |
| 60 | 2 | CA | 37114 | 597278 | 6.213857 |
| 61 | 31 | NV | 3897 | 77271 | 5.043289 |
62 rows × 5 columns
fig = plx.choropleth(df_5_temp_2, locations='State',locationmode='USA-states', color='Hidden rate %',
color_continuous_scale="Viridis",
range_color=(0, max(df_5_temp_2['Hidden rate %'])),
scope="north america",
labels={'Complaints': 'Hidden rate'}
)
fig.update_layout(
title = "Total number of Hidden ZIP code for the \"State\" in North America " ,
height = 700,
margin = dict(t=0, b=0, l=0, r=0),
font = dict(
family="Courier New, monospace",
size=11,
color="RebeccaPurple"
)
)
fig.update_traces(text = df_5_temp_2['State'])
fig.show()
df_6 = pd.DataFrame(comp_df['Tags'].value_counts(dropna=False) )
df_6 = df_6.reset_index()
df_6['Tags'], df_6['count'] = df_6['index'], df_6['Tags']
df_6.drop(['index'], 1, inplace=True)
df_6
| Tags | count | |
|---|---|---|
| 0 | NaN | 4757083 |
| 1 | Servicemember | 286553 |
| 2 | Older American | 159849 |
| 3 | Older American, Servicemember | 39515 |
fig = plx.pie(df_6, values='count', names='Tags')
fig.update_layout(
title="Total number of complaints for the \"Tags\" " ,
margin=dict(t=0, b=0, l=0, r=0),
font=dict(
family="Courier New, monospace",
size=11,
color="RebeccaPurple"
)
)
fig.update_traces(text = df_6['Tags'], textinfo = 'percent+text')
fig.show()
df_7 = pd.DataFrame(comp_df['Consumer consent provided?'].value_counts(dropna=False) )
df_7 = df_7.reset_index()
df_7['Consumer consent provided?'], df_7['count'] = df_7['index'], df_7['Consumer consent provided?']
df_7.drop(['index'], 1, inplace=True)
df_7
| Consumer consent provided? | count | |
|---|---|---|
| 0 | Consent not provided | 2099875 |
| 1 | Consent provided | 1838262 |
| 2 | NaN | 1041536 |
| 3 | Other | 254303 |
| 4 | Consent withdrawn | 9024 |
fig = plx.pie(df_7, values='count', names='Consumer consent provided?')
fig.update_layout(
title="Total number of complaints for the \"Consumer consent provided?\" " ,
margin=dict(t=0, b=0, l=0, r=0),
font=dict(
family="Courier New, monospace",
size=11,
color="RebeccaPurple"
)
)
fig.update_traces(text = df_7['Consumer consent provided?'], textinfo = 'percent+text')
fig.show()
df_8 = pd.DataFrame(comp_df['Submitted via'].value_counts(dropna=False) )
df_8 = df_8.reset_index()
df_8['Submitted via'], df_8['count'] = df_8['index'], df_8['Submitted via']
df_8.drop(['index'], 1, inplace=True)
df_8
| Submitted via | count | |
|---|---|---|
| 0 | Web | 4690912 |
| 1 | Referral | 248657 |
| 2 | Phone | 181242 |
| 3 | Postal mail | 94865 |
| 4 | Fax | 25658 |
| 5 | Web Referral | 1241 |
| 6 | 425 |
fig = plx.pie(df_8, values='count', names='Submitted via')
fig.update_layout(
title="Total number of complaints for the \"Submitted via\" " ,
margin=dict(t=0, b=0, l=0, r=0),
height = 800,
width = 700,
font=dict(
family="Courier New, monospace",
size=15,
color="RebeccaPurple"
)
)
fig.update_traces(text = df_8['Submitted via'], textinfo = 'percent+text')
fig.show()
days_taken_to_process_complaint = []
for i in tqdm(range(0, len(comp_df))):
yr = comp_df['Date sent to company'][i].split('-')[0]
mo = comp_df['Date sent to company'][i].split('-')[1]
dt = comp_df['Date sent to company'][i].split('-')[2]
yr,mo,dt
date1 = datetime(int(yr), int(mo), int(dt))
date2 = datetime(int(yr), int(mo), int(dt))
difference = date2 - date1
days_taken_to_process_complaint.append(difference.days)
comp_df['days taken to forward complaint'] = days_taken_to_process_complaint
comp_df
100%|██████████| 5243000/5243000 [01:11<00:00, 73192.78it/s]
| Date received | Product | Sub-product | Issue | Sub-issue | Consumer complaint narrative | Company public response | Company | State | ZIP code | Tags | Consumer consent provided? | Submitted via | Date sent to company | Company response to consumer | Timely response? | Consumer disputed? | Complaint ID | days taken to forward complaint | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2024-03-13 | Credit card | General-purpose credit card or charge card | Advertising and marketing, including promotion... | Didn't receive advertised or promotional terms | I formally request the removal of an inaccurat... | NaN | EQUIFAX, INC. | MO | 631XX | NaN | Consent provided | Web | 2024-03-13 | Closed with non-monetary relief | Yes | NaN | 8538710 | 0 |
| 1 | 2024-03-14 | Credit reporting or other personal consumer re... | Credit reporting | Incorrect information on your report | Information belongs to someone else | NaN | NaN | EQUIFAX, INC. | PA | 19142 | NaN | Consent not provided | Web | 2024-03-14 | Closed with non-monetary relief | Yes | NaN | 8551289 | 0 |
| 2 | 2023-10-25 | Credit reporting or other personal consumer re... | Credit reporting | Improper use of your report | Reporting company used your report improperly | NaN | Company has responded to the consumer and the ... | TRANSUNION INTERMEDIATE HOLDINGS, INC. | CA | 92114 | NaN | Consent not provided | Web | 2023-10-25 | Closed with explanation | Yes | NaN | 7755832 | 0 |
| 3 | 2023-10-25 | Mortgage | FHA mortgage | Struggling to pay mortgage | An existing modification, forbearance plan, sh... | NaN | Company has responded to the consumer and the ... | Specialized Loan Servicing Holdings LLC | CA | 956XX | NaN | Other | Web | 2023-10-25 | Closed with explanation | Yes | NaN | 7753166 | 0 |
| 4 | 2024-05-02 | Money transfer, virtual currency, or money ser... | Domestic (US) money transfer | Money was not available when promised | NaN | NaN | NaN | Sigue Corp. | TX | 78644 | NaN | Other | Web | 2024-05-02 | In progress | Yes | NaN | 8916876 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 5242995 | 2023-08-02 | Credit reporting, credit repair services, or o... | Credit reporting | Incorrect information on your report | Information belongs to someone else | NaN | NaN | EQUIFAX, INC. | TX | 76036 | NaN | Other | Web | 2023-08-02 | Closed with non-monetary relief | Yes | NaN | 7337991 | 0 |
| 5242996 | 2023-08-02 | Payday loan, title loan, or personal loan | Installment loan | Problem with additional add-on products or ser... | NaN | NaN | NaN | NBT BANCORP INC. | NJ | 08046 | NaN | NaN | Phone | 2023-08-02 | Closed with explanation | Yes | NaN | 7343156 | 0 |
| 5242997 | 2023-08-02 | Mortgage | FHA mortgage | Struggling to pay mortgage | NaN | NaN | Company believes it acted appropriately as aut... | Chesmar Homes | TX | 77316 | NaN | Other | Web | 2023-08-17 | Closed with explanation | Yes | NaN | 7340709 | 0 |
| 5242998 | 2023-08-02 | Credit reporting, credit repair services, or o... | Credit reporting | Improper use of your report | Reporting company used your report improperly | NaN | NaN | EQUIFAX, INC. | GA | 30013 | NaN | Consent not provided | Web | 2023-08-02 | Closed with explanation | Yes | NaN | 7341166 | 0 |
| 5242999 | 2023-08-04 | Payday loan, title loan, or personal loan | Payday loan | Charged fees or interest you didn't expect | NaN | NaN | NaN | Flandreau Santee Sioux Tribe | TX | 75057 | NaN | Consent not provided | Web | 2023-08-18 | Closed with explanation | Yes | NaN | 7350142 | 0 |
5243000 rows × 19 columns
df_9 = pd.DataFrame(comp_df['days taken to forward complaint'].value_counts(dropna=False) )
df_9 = df_9.reset_index()
df_9['days taken to forward complaint'], df_9['count'] = df_9['index'], df_9['days taken to forward complaint']
df_9.drop(['index'], 1, inplace=True)
df_9
| days taken to forward complaint | count | |
|---|---|---|
| 0 | 0 | 5243000 |
fig = plx.pie(df_9, values='count', names='days taken to forward complaint')
fig.update_layout(
title="Total number of complaints for the \"days taken to forward complaint\" " ,
margin=dict(t=0, b=0, l=0, r=0),
font=dict(
family="Courier New, monospace",
size=15,
color="RebeccaPurple"
)
)
fig.update_traces(text = df_9['days taken to forward complaint'], textinfo = 'percent+text')
fig.show()
df_10 = pd.DataFrame(comp_df['Company response to consumer'].value_counts(dropna=False) )
df_10 = df_10.reset_index()
df_10['Company response to consumer'], df_10['count'] = df_10['index'], df_10['Company response to consumer']
df_10.drop(['index'], 1, inplace=True)
df_10
| Company response to consumer | count | |
|---|---|---|
| 0 | Closed with explanation | 3473372 |
| 1 | Closed with non-monetary relief | 1330021 |
| 2 | In progress | 233378 |
| 3 | Closed with monetary relief | 154043 |
| 4 | Closed without relief | 17868 |
| 5 | Closed | 17611 |
| 6 | Untimely response | 11388 |
| 7 | Closed with relief | 5304 |
| 8 | NaN | 15 |
fig = plx.pie(df_10, values='count', names='Company response to consumer')
fig.update_layout(
title="Total number of complaints for the \"Company response to consumer\" " ,
margin=dict(t=0, b=0, l=0, r=0),
font=dict(
family="Courier New, monospace",
size=15,
color="RebeccaPurple"
)
)
fig.update_traces(text = df_10['Company response to consumer'], textinfo = 'percent+text')
fig.show()
df_11 = pd.DataFrame(comp_df['Timely response?'].value_counts(dropna=False) )
df_11 = df_11.reset_index()
df_11['Timely response?'], df_11['count'] = df_11['index'], df_11['Timely response?']
df_11.drop(['index'], 1, inplace=True)
df_11
| Timely response? | count | |
|---|---|---|
| 0 | Yes | 5184323 |
| 1 | No | 58677 |
fig = plx.pie(df_11, values='count', names='Timely response?')
fig.update_layout(
title="Total number of complaints for the \"Timely response?\" " ,
margin=dict(t=0, b=0, l=0, r=0),
height = 450,
font=dict(
family="Courier New, monospace",
size=15,
color="RebeccaPurple"
)
)
fig.update_traces(text = df_11['Timely response?'], textinfo = 'percent+text')
fig.show()
df_12 = pd.DataFrame(comp_df['Consumer disputed?'].value_counts(dropna=False) )
df_12 = df_12.reset_index()
df_12['Consumer disputed?'], df_12['count'] = df_12['index'], df_12['Consumer disputed?']
df_12.drop(['index'], 1, inplace=True)
df_12
| Consumer disputed? | count | |
|---|---|---|
| 0 | NaN | 4474684 |
| 1 | No | 619938 |
| 2 | Yes | 148378 |
fig = plx.pie(df_12, values='count', names='Consumer disputed?')
fig.update_layout(
title="Total number of complaints for the \"Consumer disputed?\" " ,
margin=dict(t=0, b=0, l=0, r=0),
height = 450,
font=dict(
family="Courier New, monospace",
size=15,
color="RebeccaPurple"
)
)
fig.update_traces(text = df_12['Consumer disputed?'], textinfo = 'percent+text')
fig.show()
# longest continuous increasing subsequence (subarray).
"""
--> The variable 'i' in the 1st 'for' loop returns the starting index of the required subarray.
--> The variable 'j' in the 2nd 'for' lopp returns the ending index of the required subarray.
--> For example, given array = [1,3,5,4,7].
Possible sub array (1st) = [1,3,5,4,7], 2nd = [3,5,4,7], 3rd = [5,4,7], 4th= [4,7].
--> The approach to find continuous increasing subsequence from the sub array are,
For instance we can consider 1st sub array[1,3,5,4,7]. We have to check one by one like
(1,3), (3,5), (5,4), (4,7). we have to break once 1st value is greater than 2nd value in the pair.
Here the increasing pair count is 2 for this paricular sub array.
--> The same way we have to check for possible subarray in a list. At each iteration the count is updated.
--> The longest continuous possible pair is (1,3) and (3,5). The subarray should be [1,3,5],
The longth of longest continuous increasing subsequence is possible pair count + 1, which is 2(pairs) + 1 = 3
--> If the array is in descending order or if an array contains same values ([2,2,2,2,2]). Then, the longest
continuous increasing subsequence should be 1, which is [2].
"""
arr = [1,3,5,4,7]
#arr = [2,2,2,2,2]
long_incr_subseq = 0
for i in range(0, len(arr)-1):
count = 0
new_i = i
for j in range(i+1, len(arr)):
curr_val, next_val= arr[new_i], arr[j]
if curr_val < next_val:
count += 1
else:
break
new_i += 1
if count > long_incr_subseq:
long_incr_subseq = count
if long_incr_subseq > 0:
long_incr_subseq += 1
print(long_incr_subseq)
elif long_incr_subseq == 0:
print(long_incr_subseq + 1)
3
# Largest possible number from the list
"""
--> permutations gives us the possible combinations.
--> Converting from string to interger, and then maximum of the array gives us the largest possible number.
"""
from itertools import permutations
arr_3 = [3,30,34,5,9]
possible_out = list(permutations(arr_3, len(arr_3)))
possible_out_int = []
for x in possible_out:
x = ''.join([str(q) for q in list(x)])
possible_out_int.append(int(x))
print(max(possible_out_int))
9534330
# Store all the "servlet-name", and "servlet-class" in a csv file
"""
--> The json file is saved in the current location of this notebook.
--> 'json' package is used to import and read the required JSON file.
--> "file['web-app']['servlet']" is used to access servlet key of the file(json) which holds all the servlet name and class.
--> The accessed servlet names and classes were stored in the list.
--> 'pandas' library is used to convert the list into dataframe. Then, dataframe is converted into csv file and stored
in a current location.
-->
"""
import json
import pandas as pd
with open('DT A1 sample_json (1) (1) (3).json', 'r') as f:
file = json.load(f)
servlet_name, servlet_class = [], []
for x in file['web-app']['servlet']:
servlet_name.append(x['servlet-name'])
servlet_class.append(x['servlet-class'])
df = pd.DataFrame()
df['servlet-name'], df['servlet-class'] = servlet_name, servlet_class
df.to_csv('servlet_name_class.csv',index=False)
df
| servlet-name | servlet-class | |
|---|---|---|
| 0 | cofaxCDS | org.cofax.cds.CDSServlet |
| 1 | cofaxEmail | org.cofax.cds.EmailServlet |
| 2 | cofaxAdmin | org.cofax.cds.AdminServlet |
| 3 | fileServlet | org.cofax.cds.FileServlet |
| 4 | cofaxTools | org.cofax.cms.CofaxToolsServlet |